/*----------------------------------------------------*
       Project : Covid 19
       Purpose : Processing data for regressions
       updated : June 17 2020    17:23 IST
*-----------------------------------------------------*/

** SETTING UP
version 15
clear all
pause on
set more off
qui cap log c

*========================================================================*

** LOADING DATASET
*Importing cleaned data below.

loc path_LM = "/Users/louis-maeljean/Dropbox (MIT)/West Bengal Information Campaign/AER_I/for_submission"

loc path = "`path_LM'" 		//other users should change this

cd "`path'"
use "`path'/data/outcome_survey.dta", clear

*========================================================================*

keep if resp_loc == 1   //keeping respondents living in villages (this was self reported by the respondents)

// Count = 1989

rename district_anon district
rename pincode_anon pincode

*================================================================*

*Here we generate the treatment dummies. In total we have 1 control and 8 treatments.
*renaming for clarity

tab treatment, gen(T)

rename T1 T0    //Control
rename T2 T1    //Treatment1
rename T3 T2    //Treatment2
rename T4 T3	//Treatment3
rename T5 T4	//Treatment4
rename T6 T5	//Treatment5
rename T7 T6	//Treatment6
rename T8 T7	//Treatment7
rename T9 T8	//Treatment8


* Labeling each treatment dummy with the video content *

label var T1 "Hyg + Ext + Type1"        //Video1
label var T2 "SD + Ext + Type1"         //Video2
label var T3 "Hyg + No_Ext + Type1"     //Video3
label var T4 "SD + No_Ext + Type1"      //Video4
label var T5 "Hyg + Ext + Type2"        //Video5
label var T6 "Hyg + No_Ext + Type2"     //Video6
label var T7 "SD + Ext + Type2"         //Video7
label var T8 "SD + No_Ext + Type2"      //Video8


*=============================================*

*generating dummies for treatment arms*

gen SD = 0
replace SD = 1 if treatment == 2 | treatment == 4 | treatment == 7 | treatment == 8

gen Hyg = 0
replace Hyg = 1 if treatment == 1 | treatment == 3 | treatment == 5 | treatment == 6

gen Ext = 0
replace Ext = 1 if treatment == 1 | treatment == 2 | treatment == 5 | treatment == 7

gen Int = 0
replace Int = 1 if treatment == 3 | treatment == 4 | treatment == 6 | treatment == 8

gen Type1 = 0
replace Type1 = 1 if treatment == 1 | treatment == 2 | treatment == 3 | treatment == 4

gen Type2 = 0
replace Type2 = 1 if treatment == 5 | treatment == 6 | treatment == 7 | treatment == 8


*==============================================*

*****************************************************************

tab id_date, gen(survey_day)  //generating dummies for Survey Day (9 days of surveying happened (with gaps in between))


*==============================================================
*** SECTION1 : DEMOGRAPHICS ***
*==============================================================

* labelling the demographic variables, we will be using some of them as controls in our regressions as well

label var resp_age "Respondent Age"
label var resp_loc "Location Type"
label var smartphone "Has a Smartphone?"
label var whatsapp "Uses whatsapp? "


*==============================================================
*** SECTION2 : BELIEFS ***
*==============================================================

*-----------------------*
   *1--SYMPTOMS--*
*-----------------------*

//These are what people self-report, respondents named it and surveyors selected the responses

egen symptom_right = rowtotal(cough shortnessofbreath sorethroat fever lossoftaste lossofsmell diarrhea bodyaches headaches covidtoes conjunctivitis ///
tiredness chestpain rash)


*NOTE - For wrong symptoms we are using WHO's list of symptoms (anything not in that list is assumed to be a wrong symptom)
egen symptom_wrong = rowtotal(runnynose vomiting dizziness itching chills swellinginyourlegsandfeet)


*-----------------------*
   *2--PRECAUTIONS--*
*-----------------------*

* creating one social distancing variable below which encompasses few categories implying social distancing.

gen socialdistancing2 = 0
replace socialdistancing2 = 1 if socialdistance == 1 | notgoingtocrowdedplaces == 1 | notgoingtolessercrowdedplaces == 1



// These are what people self-report, respondents named it and surveyors selected the responses

egen precaution_right = rowtotal(notgoingtowork wearingamask washinghands handsanitizer socialdistancing2 cleancloths nospit coversneezecough ///
gloves notouchface)

egen precaution_wrong = rowtotal(havingherbs drinkinghotwater drinkingchemicalsubstance drinkingalcohol takingantibiotics takingantimalarialmedication ///
hotfood_noutside)


********************************
 *generating combined variable*
********************************

gen combined_right = symptom_right + precaution_right  //total right symptoms and precautions listed by a respondent
gen combined_wrong = symptom_wrong + precaution_wrong  //total wrong symptoms and precautions listed by a respondent

gen net_knowledge = combined_right - combined_wrong


*============================================================
 *** SECTION3 - INTERACTIONS ***
*============================================================

gen days = 2    //reference period for some of the questions (this can be used to get to per day values if needed)

*----------------------------------------------*
*1--INTERACT WITHIN ENTITY (M_own_i)--*
*----------------------------------------------*


gen within_village_interactions = (interact_within_entity)/days    // within village interactions per day


*Labelling appropriately
label var within_village_interactions "Interactions in own village per day"


*---------------------------------------------*
*2--INTERACT ACROSS ENTITIES (M_ij)--*
*---------------------------------------------*


*We asked respondents where did they travel to (vill, town, city) in past 2 days and their interactions in these places
*Note - below If someone did not travel to location "x" in past 2 days, then we assume their M_ix = 0

replace another_vill_interact = 0 if another_vill_interact == .
replace another_town_interact = 0 if another_town_interact == .
replace another_city_interact = 0 if another_city_interact == .


gen M_vv =   (another_vill_interact)      //village-village interactions per day

gen M_vt =   (another_town_interact)      //village-town interactions per day

gen M_vc =   (another_city_interact)      //village-city interactions per day


* Now we generate total interactions for someone from a village in 2 day window:

gen total_interactions_vill = interact_within_entity + another_vill_interact + another_town_interact + another_city_interact


*------------------------------------------------*
*3--PROBABILITY OF TRAVEL (P_ij)--*
*------------------------------------------------*


*NOTE: There at the end of this section we generate the own travel variable which we use for our main regressions.


* variables looking at how many people went out of the entity accoring to respondent
replace people_wentout_village = 0 if people_wentout == 0     //if no one went out, then no one went to a vill/town/city
replace people_wentout_town = 0 if people_wentout == 0
replace people_wentout_city = 0 if people_wentout == 0


gen total_went_out = (people_wentout)/days               //per day total went outside entity

gen went_to_vill = (people_wentout_village)/days         //per day people travelled to another village

gen went_to_town = (people_wentout_town)/days            //per day travelled to town

gen went_to_city = (people_wentout_city)/days            //per day travelled to city


gen travel_outside_vill = total_went_out       //count of perday # going out of the village

generate p0_v = (total_went_out)/people_live   //proportion going out of village


* Calculating probabilities from travel from i to j below:

generate p_vv = (p0_v)*(went_to_vill)/(total_went_out)  if resp_loc == 1  // prob vill-vill
replace p_vv = 0 if total_went_out == 0  & resp_loc == 1

generate p_vt = (p0_v)*(went_to_town)/(total_went_out)    if resp_loc == 1  // prob vill-town
replace p_vt = 0 if total_went_out == 0  & resp_loc == 1

generate p_vc = (p0_v)*(went_to_city)/(total_went_out)    if resp_loc == 1  // prob vill-city
replace p_vc = 0 if total_went_out == 0  & resp_loc == 1


***********************
*Own Travel Variables*
***********************

gen travel_own_vill = regexm(where_traveled, "1")
gen travel_own_town = regexm(where_traveled, "2")
gen travel_own_city = regexm(where_traveled, "3")
gen travel_own_none = regexm(where_traveled, "4")

gen travel_own = 0 if resp_loc == 1
replace travel_own = 1 if travel_own_vill == 1 | travel_own_town == 1 | travel_own_city == 1

label var travel_own "Respondent went out of village"   //In past two days

*-------------------------------*
*4--DISTANCE CUTOFFS--*
*-------------------------------*

replace far_village = . if far_village == 0   //0 was the code for respondent either not kowing or no one went out in the recent past
replace far_town = . if far_village == 0
replace far_city = . if far_village == 0


*removing outliers
qui sum far_village
qui gen sds = (far_village - r(mean))/r(sd)
replace far_village = . if sds > 2 & far_village != .
drop sds


*=====================================================================
 *** SECTION4 : HANDWASH & MASKS ***
*=====================================================================

*-----------------------------------------*
*1--MASKWEAR & HANDWASH numbers--*
*-----------------------------------------*


label var masks_wear "percent people wearing masks in locality"
label var typical_wearmask "typical person wearing mask when going out (10 times)"
label var typical_handwash "handwash after coming back home (10 times)"

replace typical_handwash = (typical_handwash)/10   //rescaling the variable

*-----------------------------------------*
*2--MASK AGREE DISAGREE--*
*-----------------------------------------*


label var resp_mask_wear "wears mask when going outside"
label var wearmask_meet "If you wear a mask, you can meet and interact with people as you like"
label var wearmask_judged "People will not judge me for wearing a mask"


replace masks_wear = (masks_wear)/100              //rescaling the variable
replace typical_wearmask = (typical_wearmask)/10   //rescaling the variable

gen comm_mask_rate = (masks_wear + typical_wearmask)/2   //generating a community level pooled variable

gen mask_hw_index = (resp_mask_wear + typical_handwash)/2  //index for health preserving behaviours

*========================================================================
 *** SECTION5 : SYMPTOMATIC & ASYMPTOMATIC ***
*========================================================================


label var symptom_corona_have "Percent of symptomatic have infection"
label var asymptom_corona_have "Percent of asymptomatic have infection"

gen ppl_with_covid = (corona_have)*100/(people_live)  //in the question we asked them their belief about how many out of total popn have covid.

label var ppl_with_covid  "belief about Percent cases in own locality"

replace symptom_corona_have = (symptom_corona_have)/100        //rescaling the variable
replace asymptom_corona_have = (asymptom_corona_have)/100      //rescaling the variable


*========================================================================
 *** SECTION6 : SOCIAL LEARNING ***
*========================================================================

destring giveadvise, replace
destring getadvise, replace


*calculating flag for either giving or getting advise/info
gen ever_talk_yn = 0
replace ever_talk_yn = 1 if giveadvise_yn == 1 | getadvise_yn == 1


replace giveadvise = 0 if giveadvise_yn == 0 //if someone did not give/get advise, then they gave to/got from 0.
replace getadvise = 0 if getadvise_yn == 0

*generating count for total conversations about covid
gen ever_talk = giveadvise + getadvise

label var giveadvise_yn "Gave advise/information"
label var getadvise_yn "Got advise/information"
label var giveadvise "No. of people gave advise/info to"
label var getadvise  "No. of people got advise/info from"
label var ever_talk "Total number gave and got advise/info from"
label var ever_talk_yn "Ever gave or got advise"

*=======================================================================
 *** SECTION7 : ASHA Knowledge ***
*=======================================================================

label var asha_know "Do you know your ASHA"
label var asha_phone_know "Do you have your ASHA's number"

*========================================================================
 *** SECTION8 : MIGRATION ***
*========================================================================

label var hh_mem_back "Have migrant members from your house been able to come back? "
label var hh_new "Have people come to stay with you? "
label var new_from "From where did these people come?"
label var tot_stay "How many people started living with you?"
label var migrant_hh "How many HH (out of 10) in your area have had migrants come back"

*========================================================================
 *** SECTION9 : DIFFUSION ***
*========================================================================

*this looks at if respondent watched a video about covid, initially we were asking for post 3 days and then we switched to since the day of broadcast
*the survey days kept here are the ones with relevant time window

replace covid_watch = . if survey_day1 != 1 & survey_day6 != 1 & survey_day7 !=1 & survey_day8 !=1 & survey_day9 !=1

*For these set of questions we were reading out statements and asked respondent to rank importance from 1 to 10 (10 being highest)
label var important_1 "Hygiene"
label var important_2 "Social Distancing"
label var important_3 "Externality"
label var important_4 "No Externality"
label var important_5 "Type1"
label var important_6 "Type2"

*=======================================================================
 *** SECTION10 : JIO ACCESS ***
*========================================================================

gen jio_yn = jio_self_yn

replace jio_hh_count = -999 if jio_hh_count == . | jio_hh_count == .d   //respoindent said dont know or no response

replace jio_yn = 1 if jio_self_yn == 0 & jio_hh_count > 0  //either the respondent or someone in family has jio

label var jio_yn "Respondent or at least someone in family has a jio connection"


************************************=========*********************************
************************************=========*********************************

gen Treatment = 1    //generating a variable for all the treatments
replace Treatment = 0 if T0 == 1
label var Treatment "TREATMENT"

gen Control = 1
replace Control = 0 if Treatment == 1

gen TrtXHyg = Treatment*Hyg

gen TrtXInt = Treatment*Int

gen TrtXType2 = Treatment*Type2

generate TrtXjio = Treatment*jio_yn

* Generating variables for plots with time interactions (makes output look neater in outreg 2 as compared to directly entering Treatment#Date)*

gen D1 = Treatment*survey_day1   //treatment and day1
gen D2 = Treatment*survey_day2   //treatment and day2
gen D3 = Treatment*survey_day3
gen D4 = Treatment*survey_day4
gen D5 = Treatment*survey_day5
gen D6 = Treatment*survey_day6
gen D7 = Treatment*survey_day7
gen D8 = Treatment*survey_day8
gen D9 = Treatment*survey_day9


*---------------------------------------------------*
      *Winsorizing some of the relevant vars*
*---------------------------------------------------*

winsor total_interactions_vill, gen(Wtotal_interactions_vill) p(0.025)   //we use this for appendix table

winsor ever_talk, gen(Wever_talk) p(0.025)                               //we use this for appendix table

winsor total_interactions_vill, gen(W2total_interactions_vill) p(0.05)   //we use this for our main table

winsor ever_talk, gen(W2ever_talk) p(0.05)	                             //we use this for our main table


***Outsheeting processed data for regressions now

save "`path'/data/outcomes_reg_input.dta", replace



************************************=========*********************************
************************************=========*********************************
* END
************************************=========*********************************
************************************=========*********************************




